/***********************************************************


This do file:

1. Takes linesectionlevel_data ratings data
2. Collapses at the line-level 
3. Shows correlations between (mean) ratings across all 
sections and line-level output 


***********************************************************/


* set memory 
set more off 
clear all

set segmentsize 3g
set scheme plotplainblind


* Load  Data
use "$Data/Final/linesectionlevel_data.dta", clear

* Log Line-Level output
gen ln_output = log(actual_output)

******************************************************
****** Showing that rating in LD and HD sections *****
******* have the same effects on line output *********
******************************************************

preserve 
keep if dependency == 1
collapse (mean) rating, by(line team Date int_days doy)
rename rating rating_HD
save "$Data/Original/HDsection_ratings.dta", replace
restore 


preserve 
keep if dependency == 0
collapse (mean) rating, by(line team Date int_days doy)
rename rating rating_LD
save "$Data/Original/LDsection_ratings.dta", replace
restore 

* Collapsing section ratings at the line-level (by dependency)
preserve

collapse (mean) ln_output rating (firstnm) date shift Shift Line, by(line team Date int_days doy dependency)
sort Line Date

gen LD = dependency == 0 

// Regression for HD group 
quietly reg ln_output rating if LD == 0
local slope_hd = round(_b[rating], 0.01)
local se_hd = round(_se[rating], 0.01)

// Regression for LD group
quietly reg ln_output rating if LD == 1
local slope_ld = round(_b[rating], 0.01)
local se_ld = round(_se[rating], 0.01)

// Creating interaction variable
gen rating_ld = rating*LD

// Pooled regression with interaction term
quietly reg ln_output rating LD rating_ld

// Testing for the difference in slopes
test rating_ld
local pval = round(r(p), 0.01)

// Creating final figure with scatter and smoothed lines, and slope, SE and p-value
twoway (scatter ln_output rating if LD == 0, mcolor(blue)) (scatter ln_output rating if LD == 1, mcolor(red)) ///
   (lowess ln_output rating if LD == 0, lcolor(blue)) (lowess ln_output rating if LD == 1, lcolor(red) lpattern(dash)), ///
   xtitle("Mean Rating Across Sections") ytitle("Log Output") legend(order(1 "HD" 2 "LD")) ///
   text(5.5 2 "HD: Slope = `slope_hd', SE = `se_hd'", size(small) place(c)) ///
   text(5 2 "LD: Slope = `slope_ld', SE = `se_ld'", size(small) place(c)) ///
   text(4.5 2 "P-value (difference in slopes) = `pval'", size(small) place(c)) ///
   ylabel(0(3)15)


graph export "$Output/Figures/HDLDratings_LineOutput.pdf", replace  

restore 


*******************************************************
****** Showing that rating in Mx and NMx sections *****
******* have the same effects on line output **********
*******************************************************


* Collapsing section ratings at the line-level (by dependency)
preserve

collapse (mean) ln_output rating (firstnm) date shift Shift Line, by(line team Date int_days doy mixed)
sort Line Date


// Regression for Non-Mixed group
quietly reg ln_output rating if mixed == 0
local slope_nonmixed = round(_b[rating], 0.01)
local se_nonmixed = round(_se[rating], 0.01)

// Regression for Mixed group
quietly reg ln_output rating if mixed == 1
local slope_mixed = round(_b[rating], 0.01)
local se_mixed = round(_se[rating], 0.01)

// Creating interaction variable
gen rating_mixed = rating*mixed

// Pooled regression with interaction term
quietly reg ln_output rating mixed rating_mixed

// Testing for the difference in slopes
test rating_mixed
local pval = round(r(p), 0.01)

// Creating final figure with scatter and smoothed lines, and slope, SE and p-value
tw (scatter ln_output rating if mixed == 0, mcolor(blue)) (scatter ln_output rating if mixed == 1, mcolor(red)) ///
   (lowess ln_output rating if mixed == 0,  lcolor(blue)) (lowess ln_output rating if mixed == 1,  lcolor(red) lpattern(dash)), ///
   xtitle("Mean Rating Across Sections") ytitle("Log Output") legend(order(1 "Non-Mixed" 2 "Mixed")) ///
   text(5.5 2 "Non-Mixed: Slope = `slope_nonmixed', SE = `se_nonmixed'", size(small) place(c)) ///
   text(5 2 "Mixed: Slope = `slope_mixed', SE = `se_mixed'", size(small) place(c)) ///
   text(4.5 2 "P-value (difference in slopes) = `pval'", size(small) place(c)) ///
   ylabel(0(3)15)


graph export "$Output/Figures/MNMratings_LineOutput.pdf", replace  

restore 

* Collapsing section ratings at the line-level *
collapse (mean) ln_output productionincfc actual_output rating rating_dum (firstnm) date shift Shift Line, by(line team Date int_days doy)
sort Line Date



* Correlation between line-section-ratings (mean across sections) and actual output
binscatter ln_output rating, absorb(line) xtitle("Mean Rating Across all Sections") ytitle("Log Output") ///
nquantiles(50) 


* Historgram of Log-output and section ratings
hist ln_output, xtitle("Log Output (Line-level)")
graph save $Output/Figures/hist_lnoutput.gph, replace

hist rating, xtitle("Line-section-level ratings (Aggregated to Line-Level)")
graph save $Output/Figures/hist_ratings.gph, replace

gr combine $Output/Figures/hist_lnoutput.gph  $Output/Figures/hist_ratings.gph
graph export $Output/Figures/Line_Section_Comp.pdf, replace

* Erase individual graphs from folder
erase $Output/Figures/hist_ratings.gph
erase $Output/Figures/hist_lnoutput.gph

